#install.packages("plotly")
library(Biobase)
## Loading required package: BiocGenerics
##
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
##
## anyDuplicated, aperm, append, as.data.frame, basename, cbind,
## colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
## get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply,
## match, mget, order, paste, pmax, pmax.int, pmin, pmin.int,
## Position, rank, rbind, Reduce, rownames, sapply, setdiff, sort,
## table, tapply, union, unique, unsplit, which.max, which.min
## Welcome to Bioconductor
##
## Vignettes contain introductory material; view with
## 'browseVignettes()'. To cite Bioconductor, see
## 'citation("Biobase")', and for packages 'citation("pkgname")'.
library(plotly)
## Loading required package: ggplot2
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(NMF)
## Loading required package: registry
## Loading required package: rngtools
## Loading required package: cluster
## NMF - BioConductor layer [OK] | Shared memory capabilities [NO: bigmemory] | Cores 7/8
## To enable shared memory capabilities, try: install.extras('
## NMF
## ')
library(tidyverse)
## ── Attaching packages
## ───────────────────────────────────────
## tidyverse 1.3.2 ──
## ✔ tibble 3.1.8 ✔ dplyr 1.0.10
## ✔ tidyr 1.2.1 ✔ stringr 1.5.0
## ✔ readr 2.1.3 ✔ forcats 0.5.2
## ✔ purrr 1.0.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::combine() masks Biobase::combine(), BiocGenerics::combine()
## ✖ dplyr::filter() masks plotly::filter(), stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ✖ ggplot2::Position() masks BiocGenerics::Position(), base::Position()
library("plyr")
## ------------------------------------------------------------------------------
## You have loaded plyr after dplyr - this is likely to cause problems.
## If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
## library(plyr); library(dplyr)
## ------------------------------------------------------------------------------
##
## Attaching package: 'plyr'
##
## The following objects are masked from 'package:dplyr':
##
## arrange, count, desc, failwith, id, mutate, rename, summarise,
## summarize
##
## The following object is masked from 'package:purrr':
##
## compact
##
## The following objects are masked from 'package:plotly':
##
## arrange, mutate, rename, summarise
library("dplyr")
load('/Users/candiceyu/Documents/PSTAT296B/Code/nmfResults296_20230216.RData')
snmfl$fit$`15`
## <Object of class: NMFfitX1 >
## Method: snmf/l
## Runs: 100
## RNG:
## 10407L, 897065501L, 218876474L, -705699213L, 1644899608L, 1096111161L, -86747482L
## Total timing:
## user system elapsed
## 1.41 0.03 48.99
# Threshold greater than 0.8
Features_Num_0.8 <- extractFeatures(snmfl$fit$`15`, 0.8)
# Threshold greater between 0.6 to 0.8
Features_Num_0.6 <- extractFeatures(snmfl$fit$`15`, 0.6)
Features_Num_0.6_0.8 <- Features_Num_0.6
for (i in 1:15){
SameIndex <- !(Features_Num_0.6[[i]] %in% Features_Num_0.8[[i]])
Features_Num_0.6_0.8[[i]]<- Features_Num_0.6[[i]][SameIndex]
}
# Threshold greater between 0.4 to 0.6
Features_Num_0.4 <- extractFeatures(snmfl$fit$`15`, 0.4)
Features_Num_0.4_0.6 <- Features_Num_0.4
for (i in 1:15){
SameIndex <- !(Features_Num_0.4[[i]] %in% Features_Num_0.6[[i]])
Features_Num_0.4_0.6[[i]]<- Features_Num_0.4[[i]][SameIndex]
}
# Threshold greater between 0.2 to 0.4
Features_Num_0.2 <- extractFeatures(snmfl$fit$`15`, 0.2)
Features_Num_0.2_0.4 <- Features_Num_0.2
for (i in 1:15){
SameIndex <- !(Features_Num_0.2[[i]] %in% Features_Num_0.4[[i]])
Features_Num_0.2_0.4[[i]]<- Features_Num_0.2[[i]][SameIndex]
}
# Threshold greater between 0 to 0.2
Features_Num_0.0 <- extractFeatures(snmfl$fit$`15`, 0.0)
Features_Num_0.0_0.2 <- Features_Num_0.0
for (i in 1:15){
SameIndex <- !(Features_Num_0.0[[i]] %in% Features_Num_0.2[[i]])
Features_Num_0.0_0.2[[i]]<- Features_Num_0.0[[i]][SameIndex]
}
basis_df <- data.frame(basis(snmfl$fit$`15`))
basis_df <- cbind(rownames(basis_df), basis_df)
rownames(basis_df) <- NULL
colnames(basis_df)<- c("Benefit","Basis_1", "Basis_2", "Basis_3", "Basis_4", "Basis_5",
"Basis_6", "Basis_7", "Basis_8", "Basis_9", "Basis_10", "Basis_11",
"Basis_12", "Basis_13" ,"Basis_14", "Basis_15")
# Create Threshold greater than 0.8
Features_0.8 <- combine(basis_df[Features_Num_0.8[1][[1]], c(1,2)],
basis_df[Features_Num_0.8[2][[1]], c(1,3)],
basis_df[Features_Num_0.8[3][[1]], c(1,4)],
basis_df[Features_Num_0.8[4][[1]], c(1,5)],
basis_df[Features_Num_0.8[5][[1]], c(1,6)],
basis_df[Features_Num_0.8[6][[1]], c(1,7)],
basis_df[Features_Num_0.8[7][[1]], c(1,8)],
basis_df[Features_Num_0.8[8][[1]], c(1,9)],
basis_df[Features_Num_0.8[9][[1]], c(1,10)],
basis_df[Features_Num_0.8[10][[1]], c(1,11)],
basis_df[Features_Num_0.8[11][[1]], c(1,12)],
basis_df[Features_Num_0.8[12][[1]], c(1,13)],
basis_df[Features_Num_0.8[13][[1]], c(1,14)],
basis_df[Features_Num_0.8[14][[1]], c(1,15)],
basis_df[Features_Num_0.8[15][[1]], c(1,16)])
## Warning: `combine()` was deprecated in dplyr 1.0.0.
## ℹ Please use `vctrs::vec_c()` instead.
Features_0.8_long <- Features_0.8 %>%
pivot_longer(cols = 2:16)
Features_0.8_long <- Features_0.8_long[!(is.na(Features_0.8_long$value)), ]
Features_0.8_long$Threshold <- "0.8"
# Threshold greater between 0.6 to 0.8
Features_0.6_0.8 <- combine(basis_df[Features_Num_0.6_0.8[1][[1]], c(1,2)],
basis_df[Features_Num_0.6_0.8[2][[1]], c(1,3)],
basis_df[Features_Num_0.6_0.8[3][[1]], c(1,4)],
basis_df[Features_Num_0.6_0.8[4][[1]], c(1,5)],
basis_df[Features_Num_0.6_0.8[5][[1]], c(1,6)],
basis_df[Features_Num_0.6_0.8[6][[1]], c(1,7)],
basis_df[Features_Num_0.6_0.8[7][[1]], c(1,8)],
basis_df[Features_Num_0.6_0.8[8][[1]], c(1,9)],
basis_df[Features_Num_0.6_0.8[9][[1]], c(1,10)],
basis_df[Features_Num_0.6_0.8[10][[1]], c(1,11)],
basis_df[Features_Num_0.6_0.8[11][[1]], c(1,12)],
basis_df[Features_Num_0.6_0.8[12][[1]], c(1,13)],
basis_df[Features_Num_0.6_0.8[13][[1]], c(1,14)],
basis_df[Features_Num_0.6_0.8[14][[1]], c(1,15)],
basis_df[Features_Num_0.6_0.8[15][[1]], c(1,16)])
Features_0.6_0.8_long <- Features_0.6_0.8 %>%
pivot_longer(cols = 2:16)
Features_0.6_0.8_long <- Features_0.6_0.8_long[!(is.na(Features_0.6_0.8_long$value)), ]
Features_0.6_0.8_long$Threshold <- "0.6_0.8"
# Threshold greater between 0.4 to 0.6
Features_0.4_0.6 <- combine(basis_df[Features_Num_0.4_0.6[1][[1]], c(1,2)],
basis_df[Features_Num_0.4_0.6[2][[1]], c(1,3)],
basis_df[Features_Num_0.4_0.6[3][[1]], c(1,4)],
basis_df[Features_Num_0.4_0.6[4][[1]], c(1,5)],
basis_df[Features_Num_0.4_0.6[5][[1]], c(1,6)],
basis_df[Features_Num_0.4_0.6[6][[1]], c(1,7)],
basis_df[Features_Num_0.4_0.6[7][[1]], c(1,8)],
basis_df[Features_Num_0.4_0.6[8][[1]], c(1,9)],
basis_df[Features_Num_0.4_0.6[9][[1]], c(1,10)],
basis_df[Features_Num_0.4_0.6[10][[1]], c(1,11)],
basis_df[Features_Num_0.4_0.6[11][[1]], c(1,12)],
basis_df[Features_Num_0.4_0.6[12][[1]], c(1,13)],
basis_df[Features_Num_0.4_0.6[13][[1]], c(1,14)],
basis_df[Features_Num_0.4_0.6[14][[1]], c(1,15)],
basis_df[Features_Num_0.4_0.6[15][[1]], c(1,16)])
Features_0.4_0.6_long <- Features_0.4_0.6 %>%
pivot_longer(cols = 2:16)
Features_0.4_0.6_long <- Features_0.4_0.6_long[!(is.na(Features_0.4_0.6_long$value)), ]
Features_0.4_0.6_long$Threshold <- "0.4_0.6"
# Threshold greater between 0.2 to 0.4
Features_0.2_0.4 <- combine(basis_df[Features_Num_0.2_0.4[1][[1]], c(1,2)],
basis_df[Features_Num_0.2_0.4[2][[1]], c(1,3)],
basis_df[Features_Num_0.2_0.4[3][[1]], c(1,4)],
basis_df[Features_Num_0.2_0.4[4][[1]], c(1,5)],
basis_df[Features_Num_0.2_0.4[5][[1]], c(1,6)],
basis_df[Features_Num_0.2_0.4[6][[1]], c(1,7)],
basis_df[Features_Num_0.2_0.4[7][[1]], c(1,8)],
basis_df[Features_Num_0.2_0.4[8][[1]], c(1,9)],
basis_df[Features_Num_0.2_0.4[9][[1]], c(1,10)],
basis_df[Features_Num_0.2_0.4[10][[1]], c(1,11)],
basis_df[Features_Num_0.2_0.4[11][[1]], c(1,12)],
basis_df[Features_Num_0.2_0.4[12][[1]], c(1,13)],
basis_df[Features_Num_0.2_0.4[13][[1]], c(1,14)],
basis_df[Features_Num_0.2_0.4[14][[1]], c(1,15)],
basis_df[Features_Num_0.2_0.4[15][[1]], c(1,16)])
Features_0.2_0.4_long <- Features_0.2_0.4 %>%
pivot_longer(cols = 2:16)
Features_0.2_0.4_long <- Features_0.2_0.4_long[!(is.na(Features_0.2_0.4_long$value)), ]
Features_0.2_0.4_long$Threshold <- "0.2_0.4"
# Threshold greater between 0.0 to 0.2
Features_0.0_0.2 <- combine(basis_df[Features_Num_0.0_0.2[1][[1]], c(1,2)],
basis_df[Features_Num_0.0_0.2[2][[1]], c(1,3)],
basis_df[Features_Num_0.0_0.2[3][[1]], c(1,4)],
basis_df[Features_Num_0.0_0.2[4][[1]], c(1,5)],
basis_df[Features_Num_0.0_0.2[5][[1]], c(1,6)],
basis_df[Features_Num_0.0_0.2[6][[1]], c(1,7)],
basis_df[Features_Num_0.0_0.2[7][[1]], c(1,8)],
basis_df[Features_Num_0.0_0.2[8][[1]], c(1,9)],
basis_df[Features_Num_0.0_0.2[9][[1]], c(1,10)],
basis_df[Features_Num_0.0_0.2[10][[1]], c(1,11)],
basis_df[Features_Num_0.0_0.2[11][[1]], c(1,12)],
basis_df[Features_Num_0.0_0.2[12][[1]], c(1,13)],
basis_df[Features_Num_0.0_0.2[13][[1]], c(1,14)],
basis_df[Features_Num_0.0_0.2[14][[1]], c(1,15)],
basis_df[Features_Num_0.0_0.2[15][[1]], c(1,16)])
Features_0.0_0.2_long <- Features_0.0_0.2 %>%
pivot_longer(cols = 2:16)
Features_0.0_0.2_long <- Features_0.0_0.2_long[!(is.na(Features_0.0_0.2_long$value)), ]
Features_0.0_0.2_long$Threshold <- "0.0_0.2"
# Combine all threshold into one dataset
Features_threshold_all <- rbind(Features_0.8_long, Features_0.6_0.8_long, Features_0.4_0.6_long,
Features_0.2_0.4_long, Features_0.0_0.2_long)
names(Features_threshold_all) <- c("Benefit", "Basis", "Coef", "Threshold")
length_rank_0.8 <- data.frame()
length_rank_0.6_0.8 <- data.frame()
length_rank_0.4_0.6 <- data.frame()
length_rank_0.2_0.4 <- data.frame()
length_rank_0.0_0.2 <- data.frame()
for (i in 1:7){
length_rank_0.8 <- rbind(length_rank_0.8, length(Features_Num_0.8[[i]]))
}
for (i in 1:7){
length_rank_0.6_0.8 <- rbind(length_rank_0.6_0.8, length(Features_Num_0.6_0.8[[i]]))
}
for (i in 1:7){
length_rank_0.4_0.6 <- rbind(length_rank_0.4_0.6, length(Features_Num_0.4_0.6[[i]]))
}
for (i in 1:7){
length_rank_0.2_0.4 <- rbind(length_rank_0.2_0.4, length(Features_Num_0.2_0.4[[i]]))
}
for (i in 1:7){
length_rank_0.0_0.2 <- rbind(length_rank_0.0_0.2, length(Features_Num_0.0_0.2[[i]]))
}
length_rank <- cbind(length_rank_0.8, length_rank_0.6_0.8, length_rank_0.4_0.6,
length_rank_0.2_0.4, length_rank_0.0_0.2)
length_rank <- cbind(rownames(length_rank), length_rank)
rownames(length_rank) <- NULL
colnames(length_rank)<- c("Rank","Length_Rank_0.8", "Length_Rank_0.6_0.8", "Length_Rank_0.4_0.6",
"Length_Rank_0.2_0.4", "Length_Rank_0.0_0.2")
length_rank_t <- data.frame(t(length_rank))
length_rank_t <- length_rank_t[-1,]
length_rank_t <- cbind(rownames(length_rank_t), length_rank_t)
rownames(length_rank_t) <- NULL
colnames(length_rank_t)<- c("Threshold","Basis_1", "Basis_2", "Basis_3", "Basis_4", "Basis_5", "Basis_6", "Basis_7")
length_rank_long <- length_rank_t %>%
pivot_longer(cols = 2:8)
colnames(length_rank_long)<- c("Threshold", "Basis", "Count")
length_rank_long$Count <- as.integer(length_rank_long$Count)
Features_threshold_combine<- Features_threshold_all %>%
arrange(desc(Coef)) %>%
group_by(Basis, Threshold) %>%
mutate(count=n()) %>%
mutate(Benefits_all= paste(Benefit, collapse = ", ")) %>%
subset(select = -c(1,3)) %>%
unique()
Features_threshold_final <- cbind(Features_threshold_combine,
str_split_fixed(Features_threshold_combine$Benefits_all, ", ", n=max(Features_threshold_combine$count)))
index <- list(names(Features_threshold_final)[5:(max(Features_threshold_combine$count)+4)])
plot_ly(Features_threshold_final,
x = ~Basis,
y = ~count,
color = ~Threshold,
type = "bar",
hovertext = ~paste(...5, ...6, ...7, ...8, ...9, ...10, ...11, ...12, ...13, ...14, ...15, ...16, ...17,
...18, ...19, ...20, ...21, ...22, ...23, ...24, ...25, ...26, ...27, ...28, ...29, ...30,
...31, ...32, ...33, ...34, ...35, ...36, ...37, ...38, ...39, ...40, ...41, ...42, ...43,
...44, ...45, ...46, ...47, ...48, ...49, ...50, ...51, ...52, ...53, ...54, ...55, ...56,
...57, ...58, ...59, ...60, ...61, ...62, ...63, ...64, ...65, ...66, ...67, ...68, ...69,
...70, ...71, ...72, ...73, ...74, ...75, ...76, ...77, ...78, ...79, ...80, ...81, ...82,
...83, ...84, ...85, ...86, ...87, ...88, ...89, ...90, ...91, ...92, ...93, ...94, ...95,
...96, ...97, ...98, ...99, ...100, ...101, ...102, ...103, ...104, ...105, sep = "\n")) %>%
layout(yaxis = list(title = 'Count of Benefits'), barmode = "stack",
title="Benefits in Each Basis Factor Ordered by Threshold",
hoverlabel = list(font = list(family = "Sitka Small", size = 12, color = "Black")))
# Combine all threshold into one dataset
Features_threshold_all_2 <- rbind(Features_0.8_long, Features_0.6_0.8_long, Features_0.4_0.6_long,
Features_0.2_0.4_long)
names(Features_threshold_all_2) <- c("Benefit", "Basis", "Coef", "Threshold")
# Create a new dataset (1. all benefit separated by group; 2. add column of count)
Features_threshold_combine_2 <- Features_threshold_all_2 %>%
arrange(desc(Coef)) %>%
group_by(Basis, Threshold) %>%
dplyr::mutate(count=n()) %>%
dplyr::mutate(Benefits_all= paste(Benefit, collapse = ", ")) %>%
subset(select = -c(1,3)) %>%
unique()
Features_threshold_final_2 <- cbind(Features_threshold_combine_2,
str_split_fixed(Features_threshold_combine_2$Benefits_all, ", ", n=max(Features_threshold_combine_2$count)))
## New names:
## • `` -> `...5`
## • `` -> `...6`
## • `` -> `...7`
## • `` -> `...8`
## • `` -> `...9`
## • `` -> `...10`
## • `` -> `...11`
## • `` -> `...12`
## • `` -> `...13`
## • `` -> `...14`
## • `` -> `...15`
## • `` -> `...16`
## • `` -> `...17`
index <- list(names(Features_threshold_final_2)[5:(max(Features_threshold_combine_2$count)+4)])
plot_ly(Features_threshold_final_2,
x = ~Basis,
y = ~count,
color = ~Threshold,
# colors = 'Blues',
type = "bar",
hovertext = ~paste(...5, ...6, ...7, ...8, ...9, ...10, ...11, ...12, ...13, ...14, ...15, ...16, ...17,
sep = "\n")) %>%
layout(xaxis = list(title = "Meta Benefits", categoryorder = "array",
categoryarray = c("Basis_1", "Basis_2", "Basis_3", "Basis_4", "Basis_5",
"Basis_6", "Basis_7", "Basis_8", "Basis_9", "Basis_10",
"Basis_11", "Basis_12", "Basis_13", "Basis_14", "Basis_15")),
yaxis = list(title = 'Count of Benefits'), barmode = "stack",
title="Benefits in Each Basis Factor Ordered by Threshold",
hoverlabel = list(font = list(family = "Sitka Small", size = 12, color = "black")))